rm(list=ls())Chargement des librairies
library(tidyverse)
library(corrplot)
library(lmerTest)
library(ade4)
library(splines)
library(car)
library(plotly)
library(DT)Prealable avant analyse statistique des donnees
Import des donnees
setwd(".")
load('cham.Rdata')
datatable(cham, rownames = FALSE, filter="top", options = list(pageLength = 5, scrollX=T) )Creation des variables age et long et integration des deux variables dans le data frame cham2
age<-cham$year-cham$coh
long<-cham$ydth-cham$coh
cham2<-cbind(cham, age, long)Question 1
Représentation graphique des données
Représentation par classe d’age
cham_age<-cham2 %>%
group_by(age) %>%
dplyr::summarise(totnaissance= sum(fec), taillepop=n(), fecperind=totnaissance/n()*100)
plot1 <- ggplot(cham_age, aes(x=age, y=fecperind)) +
geom_bar(color="blue", fill=rgb(0.1,0.4,0.5,0.7), stat = "identity") +
labs(title = "Fécondité de la population en fonction de l'age",x="Age", y="Fécondité") +
theme(plot.title = element_text(hjust = 0.5)) +
geom_smooth()
ggplotly(plot1)## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
Representation sans grouper par classe d’age pour identifier s’il existe un point d’inflexion
#####Representation avec la fonction jitter
p2<-ggplot(cham2, aes(x=age, y=fec)) + geom_jitter(width = 0.55, height = 0)
p2<-p2 + labs(title = "Fécondité en fonction de l'age",x="Age", y="Fécondité") + theme(plot.title = element_text(hjust = 0.5))
p2 + geom_smooth()## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
#####Representation avec la fonction geom_count
p2bis<-ggplot(cham2, aes(x=age, y=fec)) + geom_count()
p2bis<-p2bis + labs(title = "Fécondité en fonction de l'age",x="Age", y="Fécondité") + theme(plot.title = element_text(hjust = 0.5))
p2bis + geom_smooth()## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
Modelisation pour verifier s’il existe un lien entre la fécondité annuelle et l’âge de la femelle
Tests de modèles de régression lineaire generalise avec effets aléatoires
First
glm1 <- glmer(fec ~ age + (1| id),data=cham2, family = binomial)
summary(glm1)## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: fec ~ age + (1 | id)
## Data: cham2
##
## AIC BIC logLik deviance df.resid
## 1775.1 1790.7 -884.6 1769.1 1325
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.6411 -1.1386 0.6819 0.7903 1.0477
##
## Random effects:
## Groups Name Variance Std.Dev.
## id (Intercept) 0.2129 0.4614
## Number of obs: 1328, groups: id, 217
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.58024 0.15766 3.680 0.000233 ***
## age -0.01703 0.01560 -1.092 0.275013
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr)
## age -0.903
Anova(glm1)## Analysis of Deviance Table (Type II Wald chisquare tests)
##
## Response: fec
## Chisq Df Pr(>Chisq)
## age 1.1916 1 0.275
surdispersion_glm1 <- 1775/1325;surdispersion_glm1## [1] 1.339623
AIC = 1775 et p-value = 0.27
Pas de surdispersion
observee
Second
glm2 <- glmer(fec ~ year + (1| id),data=cham2, family = binomial)## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge with max|grad| = 0.290051 (tol = 0.002, component 1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model is nearly unidentifiable: very large eigenvalue
## - Rescale variables?;Model is nearly unidentifiable: large eigenvalue ratio
## - Rescale variables?
summary(glm2)## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: fec ~ year + (1 | id)
## Data: cham2
##
## AIC BIC logLik deviance df.resid
## 1776.3 1791.8 -885.1 1770.3 1325
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.6666 -1.1423 0.6818 0.7912 1.0706
##
## Random effects:
## Groups Name Variance Std.Dev.
## id (Intercept) 0.2173 0.4662
## Number of obs: 1328, groups: id, 217
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 6.5452438 1.3195532 4.960 7.04e-07 ***
## year -0.0030515 0.0006584 -4.634 3.58e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr)
## year -0.999
## optimizer (Nelder_Mead) convergence code: 0 (OK)
## Model failed to converge with max|grad| = 0.290051 (tol = 0.002, component 1)
## Model is nearly unidentifiable: very large eigenvalue
## - Rescale variables?
## Model is nearly unidentifiable: large eigenvalue ratio
## - Rescale variables?
Third
glm3 <- glmer(fec ~ age+ year + (1| id),data=cham2, family = binomial)## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge with max|grad| = 0.292699 (tol = 0.002, component 1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model is nearly unidentifiable: very large eigenvalue
## - Rescale variables?;Model is nearly unidentifiable: large eigenvalue ratio
## - Rescale variables?
summary(glm3)## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: fec ~ age + year + (1 | id)
## Data: cham2
##
## AIC BIC logLik deviance df.resid
## 1777.1 1797.9 -884.6 1769.1 1324
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.6486 -1.1413 0.6819 0.7907 1.0438
##
## Random effects:
## Groups Name Variance Std.Dev.
## id (Intercept) 0.2128 0.4613
## Number of obs: 1328, groups: id, 217
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 2.8329285 1.3214401 2.144 0.0320 *
## age -0.0167623 0.0155980 -1.075 0.2825
## year -0.0011244 0.0006564 -1.713 0.0867 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) age
## age -0.095
## year -0.993 -0.013
## optimizer (Nelder_Mead) convergence code: 0 (OK)
## Model failed to converge with max|grad| = 0.292699 (tol = 0.002, component 1)
## Model is nearly unidentifiable: very large eigenvalue
## - Rescale variables?
## Model is nearly unidentifiable: large eigenvalue ratio
## - Rescale variables?
Four
glm4 <- glmer(fec ~ age*year + (1| id),data=cham2, family = binomial)## Warning: Some predictor variables are on very different scales: consider
## rescaling
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge with max|grad| = 3.403 (tol = 0.002, component 1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model is nearly unidentifiable: very large eigenvalue
## - Rescale variables?;Model is nearly unidentifiable: large eigenvalue ratio
## - Rescale variables?
summary(glm4)## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: fec ~ age * year + (1 | id)
## Data: cham2
##
## AIC BIC logLik deviance df.resid
## 1779.1 1805.1 -884.6 1769.1 1323
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.6675 -1.1401 0.6837 0.7906 1.0334
##
## Random effects:
## Groups Name Variance Std.Dev.
## id (Intercept) 0.2095 0.4577
## Number of obs: 1328, groups: id, 217
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.082e+01 1.319e+00 8.207 2.26e-16 ***
## age -8.721e-01 1.936e-02 -45.053 < 2e-16 ***
## year -5.109e-03 6.551e-04 -7.799 6.23e-15 ***
## age:year 4.265e-04 8.441e-06 50.533 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) age year
## age -0.073
## year -0.993 -0.002
## age:year -0.007 -0.593 -0.014
## fit warnings:
## Some predictor variables are on very different scales: consider rescaling
## optimizer (Nelder_Mead) convergence code: 0 (OK)
## Model failed to converge with max|grad| = 3.403 (tol = 0.002, component 1)
## Model is nearly unidentifiable: very large eigenvalue
## - Rescale variables?
## Model is nearly unidentifiable: large eigenvalue ratio
## - Rescale variables?
Five
glm5<- glmer(fec ~ age + (1| id) + (1| year),data=cham2, family = binomial)
summary(glm5)## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: fec ~ age + (1 | id) + (1 | year)
## Data: cham2
##
## AIC BIC logLik deviance df.resid
## 1750.5 1771.3 -871.2 1742.5 1324
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.9922 -1.0487 0.6251 0.7479 1.4398
##
## Random effects:
## Groups Name Variance Std.Dev.
## id (Intercept) 0.2402 0.4901
## year (Intercept) 0.2214 0.4705
## Number of obs: 1328, groups: id, 217; year, 27
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.65014 0.18973 3.427 0.000611 ***
## age -0.02212 0.01624 -1.362 0.173287
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr)
## age -0.779
Anova(glm5)## Analysis of Deviance Table (Type II Wald chisquare tests)
##
## Response: fec
## Chisq Df Pr(>Chisq)
## age 1.8543 1 0.1733
surdispersion_glm5 <- 1750/1324;surdispersion_glm5## [1] 1.321752
AIC = 1750 et p-value = 0.17
Pas de surdispersion
observee
Question 2
Représentation graphique des données
Création tableau de la fécondité moyenne par année
cham_ans = cham2 %>%
group_by(year) %>%
dplyr::summarise(totnaissance= sum(fec), taillepop=n(), agemoyen=mean(age)) %>%
mutate(fecperind=totnaissance/taillepop)%>%
mutate(ratiofecage=fecperind/agemoyen)Représentation graphique sans grouper par annee
p3<-ggplot(cham2, aes(x=year, y=fec)) + geom_jitter(width = 0.55, height = 0);p3
p3bis<-ggplot(cham2, aes(x=year, y=fec)) + geom_count()
p3bis<-p3bis + labs(title = "Fécondité en fonction des annees",x="Anees", y="Fécondité") + theme(plot.title = element_text(hjust = 0.5)); p3bis
p4<-ggplot(data = cham_ans, aes(x = year,y=agemoyen))+geom_point(); p4Représentation graphique par annee
p5<-ggplot(cham_ans, aes(x=year, y=fecperind)) + geom_bar(color="blue", fill=rgb(0.1,0.4,0.5,0.7), stat = "identity"); p5
p5 + geom_smooth() ## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
p6<-ggplot(data = cham_ans, aes(x = year,y=agemoyen))+geom_point(); p6Modelisation pour verifier s’il existe un lien entre la fécondité annuelle et le temps
Tests de modèles de régression lineaire generalise avec effets aléatoires
Modele GLM fecondite en fonction de l’annee
glm6 <- glmer(fec ~ year + (1| id),data=cham2, family = binomial)## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge with max|grad| = 0.290051 (tol = 0.002, component 1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model is nearly unidentifiable: very large eigenvalue
## - Rescale variables?;Model is nearly unidentifiable: large eigenvalue ratio
## - Rescale variables?
summary(glm6)## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: fec ~ year + (1 | id)
## Data: cham2
##
## AIC BIC logLik deviance df.resid
## 1776.3 1791.8 -885.1 1770.3 1325
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.6666 -1.1423 0.6818 0.7912 1.0706
##
## Random effects:
## Groups Name Variance Std.Dev.
## id (Intercept) 0.2173 0.4662
## Number of obs: 1328, groups: id, 217
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 6.5452438 1.3195532 4.960 7.04e-07 ***
## year -0.0030515 0.0006584 -4.634 3.58e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr)
## year -0.999
## optimizer (Nelder_Mead) convergence code: 0 (OK)
## Model failed to converge with max|grad| = 0.290051 (tol = 0.002, component 1)
## Model is nearly unidentifiable: very large eigenvalue
## - Rescale variables?
## Model is nearly unidentifiable: large eigenvalue ratio
## - Rescale variables?
Transformation de la variable year en variable normee reduite
year_scale<-scale(cham2$year, center=TRUE, scale= TRUE)Test à nouveau des modeles GLM
glm7 <- glmer(fec ~ year_scale + (1| id),data=cham2, family = binomial)
summary(glm7)## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: fec ~ year_scale + (1 | id)
## Data: cham2
##
## AIC BIC logLik deviance df.resid
## 1776.3 1791.8 -885.1 1770.3 1325
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.6666 -1.1423 0.6818 0.7912 1.0706
##
## Random effects:
## Groups Name Variance Std.Dev.
## id (Intercept) 0.2173 0.4662
## Number of obs: 1328, groups: id, 217
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.42505 0.06783 6.266 3.7e-10 ***
## year_scale -0.01823 0.06530 -0.279 0.78
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr)
## year_scale -0.001
surdispersion_glm7 <- 1776/1325;surdispersion_glm7## [1] 1.340377
AIC = 1776 et p-value = 0.78
Pas de surdispersion
observee
glm8 <- glmer(fec ~ year_scale+age + (1| id),data=cham2, family = binomial)
summary(glm8)## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: fec ~ year_scale + age + (1 | id)
## Data: cham2
##
## AIC BIC logLik deviance df.resid
## 1777.1 1797.9 -884.6 1769.1 1324
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.6486 -1.1413 0.6819 0.7907 1.0438
##
## Random effects:
## Groups Name Variance Std.Dev.
## id (Intercept) 0.2128 0.4613
## Number of obs: 1328, groups: id, 217
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.577851 0.159379 3.626 0.000288 ***
## year_scale -0.006749 0.066039 -0.102 0.918604
## age -0.016762 0.015809 -1.060 0.289001
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) yr_scl
## year_scale 0.146
## age -0.905 -0.163
surdispersion_glm8 <- 1777/1324;surdispersion_glm8## [1] 1.342145
glm9 <- glmer(fec ~ year_scale*age + (1| id),data=cham2, family = binomial)
summary(glm9)## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: fec ~ year_scale * age + (1 | id)
## Data: cham2
##
## AIC BIC logLik deviance df.resid
## 1779.1 1805.1 -884.6 1769.1 1323
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.6689 -1.1399 0.6839 0.7907 1.0327
##
## Random effects:
## Groups Name Variance Std.Dev.
## id (Intercept) 0.2092 0.4574
## Number of obs: 1328, groups: id, 217
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.575862 0.159548 3.609 0.000307 ***
## year_scale -0.032348 0.161967 -0.200 0.841699
## age -0.016602 0.015815 -1.050 0.293816
## year_scale:age 0.002755 0.015923 0.173 0.862625
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) yr_scl age
## year_scale 0.124
## age -0.906 -0.119
## year_scal:g -0.072 -0.913 0.059
surdispersion_glm9 <- 1779/1323;surdispersion_glm9## [1] 1.344671
Question 3
Représentation graphique des données
Création tableau de la fécondité totale par chamois
cham_id = cham2 %>%
group_by(id) %>%
dplyr::summarise(feconditetotale= sum(fec), long=long, anneetot=(ydth-min(year)+1)) %>%
unique()%>%
mutate(fecrelative=feconditetotale/anneetot)%>%
drop_na(long)## `summarise()` has grouped output by 'id'. You can override using the `.groups`
## argument.
Représentation graphique par id
p7<-ggplot(cham_id, aes(x=long, y=feconditetotale)) + geom_bar(color="blue", fill=rgb(0.1,0.4,0.5,0.7), stat = "identity"); p7
p7 + geom_smooth() ## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
p8<-ggplot(data = cham_id, aes(x=long, y=fecrelative))+geom_bar(color="blue", fill=rgb(0.1,0.4,0.5,0.7), stat = "identity"); p8
p9<-ggplot(cham_id, aes(x=long, y=feconditetotale)) + geom_count(); p9
p10<-ggplot(cham_id, aes(x=long, y=feconditetotale)) + geom_jitter(width = 0.25, height = 0.25)+ geom_smooth(); p10## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'